package com.atistudios.app.data.utils.language;

import com.atistudios.app.data.model.word.WordWithRangeModel;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import kotlin.Metadata;
import kotlin.d0.q;
import kotlin.i0.d.i;
import kotlin.i0.d.n;
import kotlin.p0.j;
import kotlin.p0.v;
import kotlin.r;

@Metadata(bv = {1, 0, 3}, d1 = {"\u0000\f\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0004\u0018\u0000 \u00042\u00020\u0001:\u0001\u0004B\u0007¢\u0006\u0004\b\u0002\u0010\u0003¨\u0006\u0005"}, d2 = {"Lcom/atistudios/app/data/utils/language/WordPhraseTokenizer;", "", "<init>", "()V", "Companion", "app_naio_viRelease"}, k = 1, mv = {1, 5, 1})
/* loaded from: classes.dex */
public final class WordPhraseTokenizer {
    public static final String COMPLETABLE_TOKEN = "_____";

    /* renamed from: Companion, reason: from kotlin metadata */
    public static final Companion INSTANCE = new Companion(null);
    private static final j anySpaceRegex;
    private static final j dashRegex;
    private static final j placeholderTokenRegex;
    private static final j punctuationRegex;
    private static final j specialCharsRegex;
    private static final Pattern splitPlaceholderTokenRegex;
    private static final List<String> unmatchedChars;
    private static final j whitespaceRegex;

    @Metadata(bv = {1, 0, 3}, d1 = {"\u0000R\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010\u000b\n\u0002\b\u0005\n\u0002\u0010\b\n\u0000\n\u0002\u0018\u0002\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\t\b\u0086\u0003\u0018\u00002\u00020\u0001B\t\b\u0002¢\u0006\u0004\b*\u0010+J\u0017\u0010\u0005\u001a\u00020\u00042\u0006\u0010\u0003\u001a\u00020\u0002H\u0002¢\u0006\u0004\b\u0005\u0010\u0006J\u0017\u0010\b\u001a\u00020\u00042\u0006\u0010\u0007\u001a\u00020\u0002H\u0002¢\u0006\u0004\b\b\u0010\u0006J1\u0010\u000f\u001a\u0014\u0012\n\u0012\b\u0012\u0004\u0012\u00020\u000e0\r\u0012\u0004\u0012\u00020\n0\f2\u0006\u0010\t\u001a\u00020\u00022\u0006\u0010\u000b\u001a\u00020\nH\u0002¢\u0006\u0004\b\u000f\u0010\u0010J#\u0010\u0015\u001a\b\u0012\u0004\u0012\u00020\u00140\r2\u0006\u0010\u0011\u001a\u00020\u00022\u0006\u0010\u0013\u001a\u00020\u0012¢\u0006\u0004\b\u0015\u0010\u0016J#\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u00020\u00172\u0006\u0010\u0011\u001a\u00020\u00022\u0006\u0010\u0013\u001a\u00020\u0012¢\u0006\u0004\b\u0018\u0010\u0019R\u0016\u0010\u001a\u001a\u00020\u00028\u0006@\u0006X\u0086T¢\u0006\u0006\n\u0004\b\u001a\u0010\u001bR\u0016\u0010\u001d\u001a\u00020\u001c8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b\u001d\u0010\u001eR\u0016\u0010\u001f\u001a\u00020\u001c8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b\u001f\u0010\u001eR\u0016\u0010 \u001a\u00020\u001c8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b \u0010\u001eR\u0016\u0010!\u001a\u00020\u001c8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b!\u0010\u001eR\u0016\u0010\"\u001a\u00020\u001c8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b\"\u0010\u001eR\u001e\u0010%\u001a\n $*\u0004\u0018\u00010#0#8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b%\u0010&R\u001c\u0010'\u001a\b\u0012\u0004\u0012\u00020\u00020\r8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b'\u0010(R\u0016\u0010)\u001a\u00020\u001c8\u0002@\u0002X\u0082\u0004¢\u0006\u0006\n\u0004\b)\u0010\u001e¨\u0006,"}, d2 = {"Lcom/atistudios/app/data/utils/language/WordPhraseTokenizer$Companion;", "", "", "text", "", "detectPlaceholders", "(Ljava/lang/String;)Z", "textToCheckForPlaceholder", "isTextPlaceholder", "detectedTextWithPlaceholder", "", "lastLocation", "Lkotlin/r;", "", "Lcom/atistudios/app/data/model/word/WordWithRangeModel;", "splitTokenWithPlaceholder", "(Ljava/lang/String;I)Lkotlin/r;", "inputTextResource", "Ljava/util/Locale;", "languageLocale", "Lcom/atistudios/app/data/model/word/WordTokenWithRangeModel;", "tokenizeTextResourceInWordsByLanguage", "(Ljava/lang/String;Ljava/util/Locale;)Ljava/util/List;", "Ljava/util/ArrayList;", "tokenizeTextResourceInChars", "(Ljava/lang/String;Ljava/util/Locale;)Ljava/util/ArrayList;", "COMPLETABLE_TOKEN", "Ljava/lang/String;", "Lkotlin/p0/j;", "anySpaceRegex", "Lkotlin/p0/j;", "dashRegex", "placeholderTokenRegex", "punctuationRegex", "specialCharsRegex", "Ljava/util/regex/Pattern;", "kotlin.jvm.PlatformType", "splitPlaceholderTokenRegex", "Ljava/util/regex/Pattern;", "unmatchedChars", "Ljava/util/List;", "whitespaceRegex", "<init>", "()V", "app_naio_viRelease"}, k = 1, mv = {1, 5, 1})
    /* loaded from: classes.dex */
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(i iVar) {
            this();
        }

        private final boolean detectPlaceholders(String text) {
            boolean N;
            N = v.N(text, "_____", false, 2, null);
            return N || n.a(text, "_____");
        }

        private final boolean isTextPlaceholder(String textToCheckForPlaceholder) {
            return WordPhraseTokenizer.placeholderTokenRegex.b(textToCheckForPlaceholder);
        }

        private final r<List<WordWithRangeModel>, Integer> splitTokenWithPlaceholder(String detectedTextWithPlaceholder, int lastLocation) {
            CharSequence S0;
            ArrayList arrayList = new ArrayList();
            Objects.requireNonNull(detectedTextWithPlaceholder, "null cannot be cast to non-null type kotlin.CharSequence");
            S0 = v.S0(detectedTextWithPlaceholder);
            String obj = S0.toString();
            if (n.a(obj, "_____")) {
                arrayList.add(new WordWithRangeModel(obj, lastLocation, obj.length()));
                lastLocation += obj.length();
            } else {
                Matcher matcher = WordPhraseTokenizer.splitPlaceholderTokenRegex.matcher(obj);
                while (matcher.find()) {
                    int groupCount = matcher.groupCount();
                    if (1 <= groupCount) {
                        int i2 = 1;
                        while (true) {
                            int i3 = i2 + 1;
                            String group = matcher.group(i2);
                            if (n.a(group, "_____") && i2 == 2) {
                                arrayList.add(new WordWithRangeModel(" ", lastLocation, 1));
                                lastLocation++;
                            }
                            if (!(group == null || group.length() == 0)) {
                                n.l("groupText: ", group);
                                if (group != null) {
                                    arrayList.add(new WordWithRangeModel(group, lastLocation, group.length()));
                                    lastLocation += group.length();
                                }
                            }
                            if (i2 == groupCount) {
                                break;
                            }
                            i2 = i3;
                        }
                    }
                }
            }
            return new r<>(arrayList, Integer.valueOf(lastLocation));
        }

        public final ArrayList<String> tokenizeTextResourceInChars(String inputTextResource, Locale languageLocale) {
            List k2;
            n.e(inputTextResource, "inputTextResource");
            n.e(languageLocale, "languageLocale");
            k2 = q.k("\u202c", "\u202b", "\ud83d", "�", "‼", "【", "】", "《", "᙭", "\u200c", "\u202a", "⁉", "⃣", "》", "「", "〰", "ٟ", "༺", "༻", "\uf610", "￼");
            ArrayList<String> arrayList = new ArrayList<>();
            BreakIterator characterInstance = BreakIterator.getCharacterInstance(languageLocale);
            characterInstance.setText(inputTextResource);
            int first = characterInstance.first();
            while (first != -1) {
                int next = characterInstance.next();
                if (next != -1) {
                    String substring = inputTextResource.substring(first, next);
                    n.d(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                    if (!k2.contains(substring)) {
                        arrayList.add(substring);
                    }
                }
                first = next;
            }
            return arrayList;
        }

        /* JADX WARN: Removed duplicated region for block: B:33:0x00ff  */
        /* JADX WARN: Removed duplicated region for block: B:35:0x0104  */
        /* JADX WARN: Removed duplicated region for block: B:52:0x0199  */
        /* JADX WARN: Removed duplicated region for block: B:54:0x019e  */
        /* JADX WARN: Removed duplicated region for block: B:61:0x01c7  */
        /* JADX WARN: Removed duplicated region for block: B:63:0x01ca  */
        /* JADX WARN: Removed duplicated region for block: B:65:0x019b  */
        /* JADX WARN: Removed duplicated region for block: B:66:0x0101  */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public final java.util.List<com.atistudios.app.data.model.word.WordTokenWithRangeModel> tokenizeTextResourceInWordsByLanguage(java.lang.String r24, java.util.Locale r25) {
            /*
                Method dump skipped, instructions count: 800
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.atistudios.app.data.utils.language.WordPhraseTokenizer.Companion.tokenizeTextResourceInWordsByLanguage(java.lang.String, java.util.Locale):java.util.List");
        }
    }

    static {
        List<String> k2;
        k2 = q.k("`", "´", "΄", "°", "～");
        unmatchedChars = k2;
        Pattern compile = Pattern.compile("(?u)^[\\p{P}\\p{Z}\\p{M}\\p{C}\\s]+$", 64);
        n.d(compile, "compile(\"(?u)^[\\\\p{P}\\\\p{Z}\\\\p{M}\\\\p{C}\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        specialCharsRegex = new j(compile);
        Pattern compile2 = Pattern.compile("(?u)^[\\p{P}]+$", 64);
        n.d(compile2, "compile(\"(?u)^[\\\\p{P}]+\\$\", Pattern.UNICODE_CASE)");
        punctuationRegex = new j(compile2);
        Pattern compile3 = Pattern.compile("(?u)^[\\s]+$", 64);
        n.d(compile3, "compile(\"(?u)^[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        whitespaceRegex = new j(compile3);
        Pattern compile4 = Pattern.compile("(?u)^_{2,}+$", 64);
        n.d(compile4, "compile(\"(?u)^_{2,}+\\$\", Pattern.UNICODE_CASE)");
        placeholderTokenRegex = new j(compile4);
        Pattern compile5 = Pattern.compile("(?u)^[\\p{Pd}]+$", 64);
        n.d(compile5, "compile(\"(?u)^[\\\\p{Pd}]+\\$\", Pattern.UNICODE_CASE)");
        dashRegex = new j(compile5);
        Pattern compile6 = Pattern.compile("(?u)[\\s]+$", 64);
        n.d(compile6, "compile(\"(?u)[\\\\s]+\\$\", Pattern.UNICODE_CASE)");
        anySpaceRegex = new j(compile6);
        splitPlaceholderTokenRegex = Pattern.compile("(?u)((?:(?!_{2,}).)+)?(_{2,})?", 64);
    }
}
